import io
import traceback
import requests
from PyPDF2 import PdfFileReader

# 判断是否是pdf文件 是否是pdf字节流

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36',
}

response = requests.get(
    'https://file.toushivip.com/annex-firm/gn_auto_doc_bonds/2021/12/29/7229c79a-684e-11ec-935f-842b2b0cacce.pdf',
    headers=headers)


def isValidPDF_bytes(pdfBytes):
    """
    参数为bytes类型数据。利用BytesIO转换。
    """
    bValid = True
    try:
        b = io.BytesIO(pdfBytes)
        reader = PdfFileReader(b)
        print(reader.getNumPages())
        if reader.getNumPages() < 1:  # 进一步通过页数判断。
            bValid = False
    except:
        bValid = False
        print('*' + traceback.format_exc())

    return bValid


pdfBytes = response.content
print(isValidPDF_bytes(pdfBytes))


def is_pdf_file(pdfFilePath):
    """
    判断pdf文件是否有效，pdfFilePath
    """
    bValid = True
    try:
        reader = PdfFileReader(pdfFilePath)
        if reader.getNumPages() < 1:  # 进一步通过页数判断。
            bValid = False
    except:
        bValid = False

    return bValid


if __name__ == '__main__':
    print(isValidPDF_pathfile('./a.pdf'))
    # print(isValidPDF_pathfile(open('./a.pdf', 'rb')))
